rm(list = ls())
here::i_am(file.path("code", "14_matching_robustness.R"))
library(here)
source(here("code", "config.R"))

#################################################################################
# Make analysis data

census <- read_parquet(here("data", "analysis", "census.parquet"))
cards <- read_parquet(here("data", "analysis", "cards.parquet"))
matches <- read_parquet(here("data", "analysis", "cards_census_matches.parquet")) |>
  pivot_longer(-card_id, names_to = "match_type", values_to = "census_id") |>
  drop_na()

est_df <- cards |>
  left_join(
    matches,
    by = "card_id",
    multiple = "all"
  ) |>
  left_join(
    census,
    by = "census_id"
  ) |>
  mutate(
    vet_combined = as.integer(vet_cards | vet_c1930),
    is_naacp = replace_na(is_naacp, FALSE)
  )

#################################################################################
# Estimate baseline regression for various matching methods,
# first x = 0, then x = 2

for (x in c(0, 2)) {
  
  match_types <- paste0(c(
    "standard_match_",
    "unique_prewar_match_",
    "unique_prewar_vet_match_",
    "unique_any_match_"
  ), x)
  
  match_rates <- est_df |>
    count(match_type) |>
    filter(match_type %in% match_types) |>
    mutate(type = factor(match_type, levels = match_types)) |>
    arrange(type) |>
    mutate(
      rate = n / sum(cards$eligible_for_match_census)
    ) |>
    pull(rate)
  
  ols <- feols(
    is_naacp ~ vet_combined |
      birthyr_cards + bpl_cards + board_identifier +
      exemption^married_cards + farmer + laborer + farm_laborer,
    data = est_df |>
      filter(
        match_type %in% match_types
      ) |>
      mutate(
        match_type = factor(
          match_type,
          levels = match_types
        )
      ),
    split = ~ match_type,
    vcov = "hetero"
  )
  
  my_nonvet <- ols |>
    map_dbl(\(x) {
      rhs <- model.matrix(x)
      lhs <-  model.matrix(x, type = "lhs")
      mean(lhs[rhs == 0])
    }) |>
    as.numeric()
  
  ols_coefs <- sprintf(
    "%.04f",
    ols |> map_dfr(broom::tidy) |> pull(estimate)
  )
  
  ols_t <- sprintf(
    "%.02f",
    ols |> map_dfr(broom::tidy) |> pull(statistic)
  )
  
  iv <- feols(
    is_naacp ~ 1 |
      birthyr_cards + bpl_cards + board_identifier +
      exemption^married_cards + farmer + laborer + farm_laborer |
      vet_combined ~ order_num_serial_norm,
    data = est_df |>
      filter(
        match_type %in% match_types
      ) |>
      mutate(
        match_type = factor(
          match_type,
          levels = match_types
        )
      ),
    split = ~ match_type,
    cluster = ~ serial_num
  )
  
  etable(
    iv,
    tex = TRUE,
    signif.code = c("***" = 0.01, "**" = 0.05, "*" = 0.10),
    fitstat = ~ n + r2 + ivwald,
    replace = TRUE,
    file = file.path(tab_dir, str_glue("matching_robustness_{x}.tex")),
    digits.stats = 3,
    fixef.group =  list(
      "Birth year and state" = c("Birth year", "Birth state"),
      "-_Prewar occupation" = c("Farmer", "Laborer", "Farm laborer")
    ),
    headers = list(
      "Tie-breaking" = c("None", "Prewar", "+Veteran", "+County")
    ),
    extralines = list(
      "__Linking rate" = sprintf("%.03f", match_rates),
      "__Dep. var. mean (nonveterans)" = my_nonvet,
      "__OLS coefficient" = ols_coefs,
      "__OLS $t$-statistic" = ols_t
    ),
    postprocess.tex = function(x) { 
      x <- gsub("Wald (1st stage), Veteran", "First stage $F$-statistic", x, fixed = TRUE) 
      x <- gsub("Exemption-Married", "Exemption $\\times$ married", x, fixed = TRUE) 
      return(x)
    }
  )
}
